1 /**
2  * Hash Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.hashset;
9 
10 private import containers.internal.hash : generateHash, hashToIndex;
11 private import containers.internal.node : shouldAddGCRange;
12 private import std.experimental.allocator.mallocator : Mallocator;
13 private import std.traits : isBasicType;
14 
15 /**
16  * Hash Set.
17  * Params:
18  *     T = the element type
19  *     Allocator = the allocator to use. Defaults to `Mallocator`.
20  *     hashFunction = the hash function to use on the elements
21  *     supportGC = true if the container should support holding references to
22  *         GC-allocated memory.
23  */
24 struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
25 	bool supportGC = shouldAddGCRange!T,
26 	bool storeHash = !isBasicType!T)
27 {
28 	this(this) @disable;
29 
30 	private import std.experimental.allocator.common : stateSize;
31 
32 	static if (stateSize!Allocator != 0)
33 	{
34 		this() @disable;
35 
36 		/**
37 		 * Use the given `allocator` for allocations.
38 		 */
39 		this(Allocator allocator)
40 		in
41 		{
42 			assert(allocator !is null, "Allocator must not be null");
43 		}
44 		do
45 		{
46 			this.allocator = allocator;
47 		}
48 
49 		/**
50 		 * Constructs a HashSet with an initial bucket count of bucketCount.
51 		 * bucketCount must be a power of two.
52 		 */
53 		this(size_t bucketCount, Allocator allocator)
54 		in
55 		{
56 			assert(allocator !is null, "Allocator must not be null");
57 			assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
58 		}
59 		do
60 		{
61 			this.allocator = allocator;
62 			initialize(bucketCount);
63 		}
64 	}
65 	else
66 	{
67 		/**
68 		 * Constructs a HashSet with an initial bucket count of bucketCount.
69 		 * bucketCount must be a power of two.
70 		 */
71 		this(size_t bucketCount)
72 		in
73 		{
74 			assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
75 		}
76 		do
77 		{
78 			initialize(bucketCount);
79 		}
80 
81 	}
82 
83 	~this()
84 	{
85 		import std.experimental.allocator : dispose;
86 		import core.memory : GC;
87 		static if (useGC)
88 			GC.removeRange(buckets.ptr);
89 		allocator.dispose(buckets);
90 	}
91 
92 	/**
93 	 * Removes all items from the set
94 	 */
95 	void clear()
96 	{
97 		foreach (ref bucket; buckets)
98 		{
99 			destroy(bucket);
100 			bucket = Bucket.init;
101 		}
102 		_length = 0;
103 	}
104 
105 	/**
106 	 * Removes the given item from the set.
107 	 * Returns: false if the value was not present
108 	 */
109 	bool remove(T value)
110 	{
111 		if (buckets.length == 0)
112 			return false;
113 		immutable Hash hash = hashFunction(value);
114 		immutable size_t index = hashToIndex(hash, buckets.length);
115 		static if (storeHash)
116 			immutable bool removed = buckets[index].remove(ItemNode(hash, value));
117 		else
118 			immutable bool removed = buckets[index].remove(ItemNode(value));
119 		if (removed)
120 			--_length;
121 		return removed;
122 	}
123 
124 	/**
125 	 * Returns: true if value is contained in the set.
126 	 */
127 	bool contains(T value) inout
128 	{
129 		return (value in this) !is null;
130 	}
131 
132 	/**
133 	 * Supports $(B a in b) syntax
134 	 */
135 	inout(T)* opBinaryRight(string op)(T value) inout if (op == "in")
136 	{
137 		if (buckets.length == 0 || _length == 0)
138 			return null;
139 		immutable Hash hash = hashFunction(value);
140 		immutable index = hashToIndex(hash, buckets.length);
141 		return buckets[index].get(value, hash);
142 	}
143 
144 	/**
145 	 * Inserts the given item into the set.
146 	 * Params: value = the value to insert
147 	 * Returns: true if the value was actually inserted, or false if it was
148 	 *     already present.
149 	 */
150 	bool insert(T value)
151 	{
152 		if (buckets.length == 0)
153 			initialize(4);
154 		Hash hash = hashFunction(value);
155 		immutable size_t index = hashToIndex(hash, buckets.length);
156 		static if (storeHash)
157 			auto r = buckets[index].insert(ItemNode(hash, value));
158 		else
159 			auto r = buckets[index].insert(ItemNode(value));
160 		if (r)
161 			++_length;
162 		if (shouldRehash)
163 			rehash();
164 		return r;
165 	}
166 
167 	/// ditto
168 	bool opOpAssign(string op)(T item) if (op == "~")
169 	{
170 		return insert(item);
171 	}
172 
173 	/// ditto
174 	alias put = insert;
175 
176 	/// ditto
177 	alias insertAnywhere = insert;
178 
179 	/**
180 	 * Returns: true if the set has no items
181 	 */
182 	bool empty() const nothrow pure @nogc @safe @property
183 	{
184 		return _length == 0;
185 	}
186 
187 	/**
188 	 * Returns: the number of items in the set
189 	 */
190 	size_t length() const nothrow pure @nogc @safe @property
191 	{
192 		return _length;
193 	}
194 
195 	/**
196 	 * Forward range interface
197 	 */
198 	auto opSlice(this This)() nothrow @nogc @trusted
199 	{
200 		return Range!(This)(&this);
201 	}
202 
203 private:
204 
205 	import containers.internal.element_type : ContainerElementType;
206 	import containers.internal.mixins : AllocatorState;
207 	import containers.internal.node : shouldAddGCRange, FatNodeInfo;
208 	import containers.internal.storage_type : ContainerStorageType;
209 	import std.traits : isPointer;
210 
211 	alias LengthType = ubyte;
212 	alias N = FatNodeInfo!(ItemNode.sizeof, 1, 64, LengthType.sizeof);
213 	enum ITEMS_PER_NODE = N[0];
214 	static assert(LengthType.max > ITEMS_PER_NODE);
215 	enum bool useGC = supportGC && shouldAddGCRange!T;
216 	alias Hash = typeof({ T v = void; return hashFunction(v); }());
217 
218 	void initialize(size_t bucketCount)
219 	{
220 		import core.memory : GC;
221 		import std.experimental.allocator : makeArray;
222 
223 		makeBuckets(bucketCount);
224 		static if (useGC)
225 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
226 	}
227 
228 	static struct Range(ThisT)
229 	{
230 		this(ThisT* t)
231 		{
232 			foreach (i, ref bucket; t.buckets)
233 			{
234 				bucketIndex = i;
235 				if (bucket.root !is null)
236 				{
237 					currentNode = cast(Bucket.BucketNode*) bucket.root;
238 					break;
239 				}
240 			}
241 			this.t = t;
242 		}
243 
244 		bool empty() const nothrow @safe @nogc @property
245 		{
246 			return currentNode is null;
247 		}
248 
249 		ET front() nothrow @safe @nogc @property
250 		{
251 			return cast(ET) currentNode.items[nodeIndex].value;
252 		}
253 
254 		void popFront() nothrow @trusted @nogc
255 		{
256 			if (nodeIndex + 1 < currentNode.l)
257 			{
258 				++nodeIndex;
259 				return;
260 			}
261 			else
262 			{
263 				nodeIndex = 0;
264 				if (currentNode.next is null)
265 				{
266 					++bucketIndex;
267 					while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null)
268 						++bucketIndex;
269 					if (bucketIndex < t.buckets.length)
270 						currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root;
271 					else
272 						currentNode = null;
273 				}
274 				else
275 				{
276 					currentNode = currentNode.next;
277 					assert(currentNode.l > 0, "Empty node");
278 				}
279 			}
280 		}
281 
282 	private:
283 		alias ET = ContainerElementType!(ThisT, T);
284 		ThisT* t;
285 		Bucket.BucketNode* currentNode;
286 		size_t bucketIndex;
287 		size_t nodeIndex;
288 	}
289 
290 	void makeBuckets(size_t bucketCount)
291 	{
292 		import std.experimental.allocator : makeArray;
293 
294 		static if (stateSize!Allocator == 0)
295 			buckets = allocator.makeArray!Bucket(bucketCount);
296 		else
297 		{
298 			import std.conv:emplace;
299 
300 			buckets = cast(Bucket[]) allocator.allocate(Bucket.sizeof * bucketCount);
301 			foreach (ref bucket; buckets)
302 				emplace!Bucket(&bucket, allocator);
303 		}
304 	}
305 
306 	bool shouldRehash() const pure nothrow @safe @nogc
307 	{
308 		immutable float numberOfNodes = cast(float) _length / cast(float) ITEMS_PER_NODE;
309 		return (numberOfNodes / cast(float) buckets.length) > 0.75f;
310 	}
311 
312 	void rehash() @trusted
313 	{
314 		import std.experimental.allocator : makeArray, dispose;
315 		import core.memory : GC;
316 
317 		immutable size_t newLength = buckets.length << 1;
318 		Bucket[] oldBuckets = buckets;
319 		makeBuckets(newLength);
320 		assert (buckets, "Bucket reallocation failed");
321 		assert (buckets.length == newLength, "Bucket reallocation size mismatch");
322 		static if (useGC)
323 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
324 		foreach (ref const bucket; oldBuckets)
325 		{
326 			for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next)
327 			{
328 				for (size_t i = 0; i < node.l; ++i)
329 				{
330 					static if (storeHash)
331 					{
332 						immutable Hash hash = node.items[i].hash;
333 						size_t index = hashToIndex(hash, buckets.length);
334 						buckets[index].insert(ItemNode(hash, node.items[i].value));
335 					}
336 					else
337 					{
338 						immutable Hash hash = hashFunction(node.items[i].value);
339 						size_t index = hashToIndex(hash, buckets.length);
340 						buckets[index].insert(ItemNode(node.items[i].value));
341 					}
342 				}
343 			}
344 		}
345 		static if (useGC)
346 			GC.removeRange(oldBuckets.ptr);
347 		allocator.dispose(oldBuckets);
348 	}
349 
350 	static struct Bucket
351 	{
352 		this(this) @disable;
353 
354 		static if (stateSize!Allocator != 0)
355 		{
356 			this(Allocator allocator)
357 			{
358 				this.allocator = allocator;
359 			}
360 			this() @disable;
361 		}
362 
363 		~this()
364 		{
365 			import core.memory : GC;
366 			import std.experimental.allocator : dispose;
367 
368 			BucketNode* current = root;
369 			BucketNode* previous;
370 			while (true)
371 			{
372 				if (previous !is null)
373 				{
374 					static if (useGC)
375 						GC.removeRange(previous);
376 					allocator.dispose(previous);
377 				}
378 				previous = current;
379 				if (current is null)
380 					break;
381 				current = current.next;
382 			}
383 		}
384 
385 		static struct BucketNode
386 		{
387 			ContainerStorageType!(T)* get(ItemNode n)
388 			{
389 				foreach (ref item; items[0 .. l])
390 				{
391 					static if (storeHash)
392 					{
393 						static if (isPointer!T)
394 						{
395 							if (item.hash == n.hash && *item.value == *n.value)
396 								return &item.value;
397 						}
398 						else
399 						{
400 							if (item.hash == n.hash && item.value == n.value)
401 								return &item.value;
402 						}
403 					}
404 					else
405 					{
406 						static if (isPointer!T)
407 						{
408 							if (*item.value == *n.value)
409 								return &item.value;
410 						}
411 						else
412 						{
413 							if (item.value == n.value)
414 								return &item.value;
415 						}
416 					}
417 				}
418 				return null;
419 			}
420 
421 			void insert(ItemNode n)
422 			{
423 				items[l] = n;
424 				++l;
425 			}
426 
427 			bool remove(ItemNode n)
428 			{
429 				import std.algorithm : SwapStrategy, remove;
430 
431 				foreach (size_t i, ref node; items[0 .. l])
432 				{
433 					static if (storeHash)
434 					{
435 						static if (isPointer!T)
436 							immutable bool matches = node.hash == n.hash && *node.value == *n.value;
437 						else
438 							immutable bool matches = node.hash == n.hash && node.value == n.value;
439 					}
440 					else
441 					{
442 						static if (isPointer!T)
443 							immutable bool matches = *node.value == *n.value;
444 						else
445 							immutable bool matches = node.value == n.value;
446 					}
447 					if (matches)
448 					{
449 						items[0 .. l].remove!(SwapStrategy.unstable)(i);
450 						l--;
451 						return true;
452 					}
453 				}
454 				return false;
455 			}
456 
457 			BucketNode* next;
458 			LengthType l;
459 			ItemNode[ITEMS_PER_NODE] items;
460 		}
461 
462 		bool insert(ItemNode n)
463 		{
464 			import core.memory : GC;
465 			import std.experimental.allocator : make;
466 
467 			BucketNode* hasSpace = null;
468 			for (BucketNode* current = root; current !is null; current = current.next)
469 			{
470 				if (current.get(n) !is null)
471 					return false;
472 				if (current.l < current.items.length)
473 					hasSpace = current;
474 			}
475 			if (hasSpace !is null)
476 				hasSpace.insert(n);
477 			else
478 			{
479 				BucketNode* newNode = make!BucketNode(allocator);
480 				static if (useGC)
481 					GC.addRange(newNode, BucketNode.sizeof);
482 				newNode.insert(n);
483 				newNode.next = root;
484 				root = newNode;
485 			}
486 			return true;
487 		}
488 
489 		bool remove(ItemNode n)
490 		{
491 			import core.memory : GC;
492 			import std.experimental.allocator : dispose;
493 
494 			BucketNode* current = root;
495 			BucketNode* previous;
496 			while (current !is null)
497 			{
498 				immutable removed = current.remove(n);
499 				if (removed)
500 				{
501 					if (current.l == 0)
502 					{
503 						if (previous !is null)
504 							previous.next = current.next;
505 						else
506 							root = current.next;
507 						static if (useGC)
508 							GC.removeRange(current);
509 						allocator.dispose(current);
510 					}
511 					return true;
512 				}
513 				previous = current;
514 				current = current.next;
515 			}
516 			return false;
517 		}
518 
519 		inout(T)* get(T value, Hash hash) inout
520 		{
521 			for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next)
522 			{
523 				static if (storeHash)
524 				{
525 					auto v = current.get(ItemNode(hash, value));
526 				}
527 				else
528 				{
529 					auto v = current.get(ItemNode(value));
530 				}
531 
532 				if (v !is null)
533 					return cast(typeof(return)) v;
534 			}
535 			return null;
536 		}
537 
538 		BucketNode* root;
539 		mixin AllocatorState!Allocator;
540 	}
541 
542 	static struct ItemNode
543 	{
544 		bool opEquals(ref const T v) const
545 		{
546 			static if (isPointer!T)
547 				return *v == *value;
548 			else
549 				return v == value;
550 		}
551 
552 		bool opEquals(ref const ItemNode other) const
553 		{
554 			static if (storeHash)
555 				if (other.hash != hash)
556 					return false;
557 			static if (isPointer!T)
558 				return *other.value == *value;
559 			else
560 				return other.value == value;
561 		}
562 
563 		static if (storeHash)
564 			Hash hash;
565 		ContainerStorageType!T value;
566 
567 		static if (storeHash)
568 		{
569 			this(Z)(Hash nh, Z nv)
570 			{
571 				this.hash = nh;
572 				this.value = nv;
573 			}
574 		}
575 		else
576 		{
577 			this(Z)(Z nv)
578 			{
579 				this.value = nv;
580 			}
581 		}
582 	}
583 
584 	mixin AllocatorState!Allocator;
585 	Bucket[] buckets;
586 	size_t _length;
587 }
588 
589 ///
590 version(emsi_containers_unittest) unittest
591 {
592 	import std.algorithm : canFind;
593 	import std.array : array;
594 	import std.range : walkLength;
595 	import std.uuid : randomUUID;
596 
597 	auto s = HashSet!string(16);
598 	assert(s.remove("DoesNotExist") == false);
599 	assert(!s.contains("nonsense"));
600 	assert(s.put("test"));
601 	assert(s.contains("test"));
602 	assert(!s.put("test"));
603 	assert(s.contains("test"));
604 	assert(s.length == 1);
605 	assert(!s.contains("nothere"));
606 	s.put("a");
607 	s.put("b");
608 	s.put("c");
609 	s.put("d");
610 	string[] strings = s[].array;
611 	assert(strings.canFind("a"));
612 	assert(strings.canFind("b"));
613 	assert(strings.canFind("c"));
614 	assert(strings.canFind("d"));
615 	assert(strings.canFind("test"));
616 	assert(*("a" in s) == "a");
617 	assert(*("b" in s) == "b");
618 	assert(*("c" in s) == "c");
619 	assert(*("d" in s) == "d");
620 	assert(*("test" in s) == "test");
621 	assert(strings.length == 5);
622 	assert(s.remove("test"));
623 	assert(s.length == 4);
624 	s.clear();
625 	assert(s.length == 0);
626 	assert(s.empty);
627 	s.put("abcde");
628 	assert(s.length == 1);
629 	foreach (i; 0 .. 10_000)
630 	{
631 		s.put(randomUUID().toString);
632 	}
633 	assert(s.length == 10_001);
634 
635 	// Make sure that there's no range violation slicing an empty set
636 	HashSet!int e;
637 	foreach (i; e[])
638 		assert(i > 0);
639 
640 	enum MAGICAL_NUMBER = 600_000;
641 
642 	HashSet!int f;
643 	foreach (i; 0 .. MAGICAL_NUMBER)
644 		assert(f.insert(i));
645 	assert(f.length == f[].walkLength);
646 	foreach (i; 0 .. MAGICAL_NUMBER)
647 		assert(i in f);
648 	foreach (i; 0 .. MAGICAL_NUMBER)
649 		assert(f.remove(i));
650 	foreach (i; 0 .. MAGICAL_NUMBER)
651 		assert(!f.remove(i));
652 
653 	HashSet!int g;
654 	foreach (i; 0 .. MAGICAL_NUMBER)
655 		assert(g.insert(i));
656 
657 	static struct AStruct
658 	{
659 		int a;
660 		int b;
661 	}
662 
663 	HashSet!(AStruct*, Mallocator, a => a.a) fred;
664 	fred.insert(new AStruct(10, 10));
665 	auto h = new AStruct(10, 10);
666 	assert(h in fred);
667 }
668 
669 version(emsi_containers_unittest) unittest
670 {
671 	static class Foo
672 	{
673 		string name;
674 	}
675 
676 	hash_t stringToHash(string str) @safe pure nothrow @nogc
677 	{
678 		hash_t hash = 5381;
679 		return hash;
680 	}
681 
682 	hash_t FooToHash(Foo e) pure @safe nothrow @nogc
683 	{
684 		return stringToHash(e.name);
685 	}
686 
687 	HashSet!(Foo, Mallocator, FooToHash) hs;
688 	auto f = new Foo;
689 	hs.insert(f);
690 	assert(f in hs);
691 	auto r = hs[];
692 }
693 
694 version(emsi_containers_unittest) unittest
695 {
696 	static class Foo
697 	{
698 		string name;
699 		this(string n) { this.name = n; }
700 		bool opEquals(const Foo of) const {
701 			if(of !is null) { return this.name == of.name; }
702 			else { return false; }
703 		}
704 	}
705 
706 	hash_t stringToHash(string str) @safe pure nothrow @nogc
707 	{
708 		hash_t hash = 5381;
709 		return hash;
710 	}
711 
712 	hash_t FooToHash(in Foo e) pure @safe nothrow @nogc
713 	{
714 		return stringToHash(e.name);
715 	}
716 
717 	string foo = "foo";
718 	HashSet!(const(Foo), Mallocator, FooToHash) hs;
719 	const(Foo) f = new const Foo(foo);
720 	hs.insert(f);
721 	assert(f in hs);
722 	auto r = hs[];
723 	assert(!r.empty);
724 	auto fro = r.front;
725 	assert(fro.name == foo);
726 }
727 
728 version(emsi_containers_unittest) unittest
729 {
730 	hash_t maxCollision(ulong x)
731 	{
732 		return 0;
733 	}
734 
735 	HashSet!(ulong, Mallocator, maxCollision) set;
736 	auto ipn = set.ITEMS_PER_NODE; // Need this info to trigger this bug, so I made it public
737 	assert(ipn > 1); // Won't be able to trigger this bug if there's only 1 item per node
738 
739 	foreach (i; 0 .. 2 * ipn - 1)
740 		set.insert(i);
741 
742 	assert(0 in set); // OK
743 	bool ret = set.insert(0); // 0 should be already in the set
744 	assert(!ret); // Fails
745 	assert(set.length == 2 * ipn - 1); // Fails
746 }
747 
748 version(emsi_containers_unittest) unittest
749 {
750 	import std.experimental.allocator.showcase;
751 	auto allocator = mmapRegionList(1024);
752 	auto set = HashSet!(ulong, typeof(&allocator))(0x1000, &allocator);
753 	set.insert(124);
754 }